**********************************
* 4. REGRESSIONS AND SIMULATIONS *
**********************************

*************************************************
* CONTENT:	(1) Running regressions             *
*			(2) Out-of-sample test              *
*           (3) Simulating in the world dataset *
*************************************************


* (1) Running regressions
**************************

set more off

* Loading WVS variable

use WPO_WVS_QOG.dta, clear

* Support for strong leader

reg E114_clean wdi_pop_log wdi_popf wdi_pop14 wdi_pop65 vdem_libdem vdem_corr gdp_log wdi_tele lp_lat_abst wdi_popurb lp_catho80 lp_muslim80 lp_protmg80 year year_sq female education age urban income c.education#c.vdem_corr c.education#c.vdem_libdem c.income#c.vdem_corr c.income#c.vdem_libdem c.female#c.lp_catho80 c.female#c.lp_muslim80 c.female#c.lp_protmg80 c.age#c.year if E114_inclusion==1, vce(cluster survey_id) 
outreg2 using table.doc, p dec(2) replace

* Support for democracy

reg E117_clean wdi_pop_log wdi_popf wdi_pop14 wdi_pop65 vdem_libdem vdem_corr gdp_log wdi_tele lp_lat_abst wdi_popurb lp_catho80 lp_muslim80 lp_protmg80 year year_sq female education age urban income c.education#c.vdem_corr c.education#c.vdem_libdem c.income#c.vdem_corr c.income#c.vdem_libdem c.female#c.lp_catho80 c.female#c.lp_muslim80 c.female#c.lp_protmg80 c.age#c.year if E117_inclusion==1, vce(cluster survey_id) 
outreg2 using table.doc, p dec(2) append

* Support for male leader

reg D059_clean wdi_pop_log wdi_popf wdi_pop14 wdi_pop65 vdem_libdem vdem_corr gdp_log wdi_tele lp_lat_abst wdi_popurb lp_catho80 lp_muslim80 lp_protmg80 year year_sq female education age urban income c.education#c.vdem_corr c.education#c.vdem_libdem c.income#c.vdem_corr c.income#c.vdem_libdem c.female#c.lp_catho80 c.female#c.lp_muslim80 c.female#c.lp_protmg80 c.age#c.year if D059_inclusion==1, vce(cluster survey_id) 
outreg2 using table.doc, p dec(2) append

* (2) Out-of-sample tests
*************************

* Removing 45 random country-years

forvalues i = 1(1)21{

gen random_E114`i'=runiform() if E114_inclusion==1 & group_id==1
sort random_E114`i'
gen test_E114`i'=1 if _n<46
bysort survey_id: egen test_E114_2b`i'=max(test_E114`i')

qui reg E114_clean wdi_pop_log wdi_popf wdi_pop14 wdi_pop65 vdem_libdem vdem_corr gdp_log wdi_tele lp_lat_abst wdi_popurb lp_catho80 lp_muslim80 lp_protmg80 year year_sq female education age urban income c.education#c.vdem_corr c.education#c.vdem_libdem c.income#c.vdem_corr c.income#c.vdem_libdem c.female#c.lp_catho80 c.female#c.lp_muslim80 c.female#c.lp_protmg80 c.age#c.year if E114_inclusion==1  & test_E114_2b`i'!=1
predict E114_pred_test`i'
replace E114_pred_test`i'=. if E114_inclusion==0

gen E114_difference_test`i'=E114_clean-E114_pred_test`i' if test_E114_2b`i'==1

}

forvalues i = 1(1)21{

gen random_E117`i'=runiform() if E117_inclusion==1 & group_id==1
sort random_E117`i'
gen test_E117`i'=1 if _n<46
bysort survey_id: egen test_E117_2b`i'=max(test_E117`i')

qui reg E117_clean wdi_pop_log wdi_popf wdi_pop14 wdi_pop65 vdem_libdem vdem_corr gdp_log wdi_tele lp_lat_abst wdi_popurb lp_catho80 lp_muslim80 lp_protmg80 year year_sq female education age urban income c.education#c.vdem_corr c.education#c.vdem_libdem c.income#c.vdem_corr c.income#c.vdem_libdem c.female#c.lp_catho80 c.female#c.lp_muslim80 c.female#c.lp_protmg80 c.age#c.year if E117_inclusion==1  & test_E117_2b`i'!=1
predict E117_pred_test`i'
replace E117_pred_test`i'=. if E117_inclusion==0

gen E117_difference_test`i'=E117_clean-E117_pred_test`i' if test_E117_2b`i'==1

}

forvalues i = 1(1)21{

gen random_D059`i'=runiform() if D059_inclusion==1 & group_id==1
sort random_D059`i'
gen test_D059`i'=1 if _n<46
bysort survey_id: egen test_D059_2b`i'=max(test_D059`i')

qui reg D059_clean wdi_pop_log wdi_popf wdi_pop14 wdi_pop65 vdem_libdem vdem_corr gdp_log wdi_tele lp_lat_abst wdi_popurb lp_catho80 lp_muslim80 lp_protmg80 year year_sq female education age urban income c.education#c.vdem_corr c.education#c.vdem_libdem c.income#c.vdem_corr c.income#c.vdem_libdem c.female#c.lp_catho80 c.female#c.lp_muslim80 c.female#c.lp_protmg80 c.age#c.year if D059_inclusion==1  & test_D059_2b`i'!=1
predict D059_pred_test`i'
replace D059_pred_test`i'=. if D059_inclusion==0

gen D059_difference_test`i'=D059_clean-D059_pred_test`i' if test_D059_2b`i'==1

}

* Removing all years from 15 randomly-selected observations 


forvalues i = 1(1)21{

sort random_E114`i'
gen test_E114bis`i'=1 if _n<15
bysort country: egen test_E114_3b`i'=max(test_E114bis`i')

qui reg E114_clean wdi_pop_log wdi_popf wdi_pop14 wdi_pop65 vdem_libdem vdem_corr gdp_log wdi_tele lp_lat_abst wdi_popurb lp_catho80 lp_muslim80 lp_protmg80 year year_sq female education age urban income c.education#c.vdem_corr c.education#c.vdem_libdem c.income#c.vdem_corr c.income#c.vdem_libdem c.female#c.lp_catho80 c.female#c.lp_muslim80 c.female#c.lp_protmg80 c.age#c.year if E114_inclusion==1  & test_E114_3b`i'!=1
predict E114_pred_test2b`i'
replace E114_pred_test2b`i'=. if E114_inclusion==0

gen E114_difference_testbis`i'=E114_clean-E114_pred_test2b`i' if test_E114_3b`i'==1

}

forvalues i = 1(1)21{

sort random_E117`i'
gen test_E117bis`i'=1 if _n<15
bysort country: egen test_E117_3b`i'=max(test_E117`i')

qui reg E117_clean wdi_pop_log wdi_popf wdi_pop14 wdi_pop65 vdem_libdem vdem_corr gdp_log wdi_tele lp_lat_abst wdi_popurb lp_catho80 lp_muslim80 lp_protmg80 year year_sq female education age urban income c.education#c.vdem_corr c.education#c.vdem_libdem c.income#c.vdem_corr c.income#c.vdem_libdem c.female#c.lp_catho80 c.female#c.lp_muslim80 c.female#c.lp_protmg80 c.age#c.year if E117_inclusion==1  & test_E117_3b`i'!=1
predict E117_pred_test2b`i'
replace E117_pred_test2b`i'=. if E117_inclusion==0

gen E117_difference_testbis`i'=E117_clean-E117_pred_test2b`i' if test_E117_3b`i'==1

}

forvalues i = 1(1)21{

sort random_D059`i'
gen test_D059bis`i'=1 if _n<15
bysort country: egen test_D059_3b`i'=max(test_D059`i')

qui reg D059_clean wdi_pop_log wdi_popf wdi_pop14 wdi_pop65 vdem_libdem vdem_corr gdp_log wdi_tele lp_lat_abst wdi_popurb lp_catho80 lp_muslim80 lp_protmg80 year year_sq female education age urban income c.education#c.vdem_corr c.education#c.vdem_libdem c.income#c.vdem_corr c.income#c.vdem_libdem c.female#c.lp_catho80 c.female#c.lp_muslim80 c.female#c.lp_protmg80 c.age#c.year if D059_inclusion==1  & test_D059_3b`i'!=1
predict D059_pred_test2b`i'
replace D059_pred_test2b`i'=. if D059_inclusion==0

gen D059_difference_testbis`i'=D059_clean-D059_pred_test2b`i' if test_D059_3b`i'==1

}

* Creating a variable capturing the mean of each simulation

forvalues i = 1(1)21{

egen mean_E114_difference_test`i'=mean(E114_difference_test`i')
egen mean_E114_difference_testbis`i'=mean(E114_difference_testbis`i')
egen mean_E117_difference_test`i'=mean(E117_difference_test`i')
egen mean_E117_difference_testbis`i'=mean(E117_difference_testbis`i')
egen mean_D059_difference_test`i'=mean(D059_difference_test`i')
egen mean_D059_difference_testbis`i'=mean(D059_difference_testbis`i')

}

* Keeping only one line in the dataset and only the relevant variable

gen id=_n
keep if id==1
keep mean_* id

* Reshaping the variabel to construct the graph

reshape long mean_E114_difference_test mean_E114_difference_testbis mean_E117_difference_test mean_E117_difference_testbis mean_D059_difference_test mean_D059_difference_testbis, i(id) j(occurrence)

* Giving label to variables

lab var mean_E114_difference_test "Test 1"
lab var mean_E117_difference_test "Test 1"
lab var mean_D059_difference_test "Test 1"

lab var mean_E114_difference_testbis "Test 2"
lab var mean_E117_difference_testbis "Test 2"
lab var mean_D059_difference_testbis "Test 2"

* Ploting the results

line mean_E114_difference_test mean_E114_difference_testbis occurrence, xlabel(1(2)21)  scheme(s2mono) ylabel(-0.50(0.10)0.50) yline(0, lw(thick)) ytitle("Difference between actual and predicted values") xtitle("Simulation N°") title("Strong leader", color(black)) graphregion(color(white))
graph save simulation1.gph, replace

line mean_E117_difference_test mean_E117_difference_testbis occurrence, xlabel(1(2)21)  scheme(s2mono) ylabel(-0.50(0.10)0.50) yline(0, lw(thick)) xtitle("Simulation N°") title("Democracy", color(black)) graphregion(color(white))
graph save simulation2.gph, replace

line mean_D059_difference_test mean_D059_difference_testbis occurrence, xlabel(1(2)21)  scheme(s2mono) ylabel(-0.50(0.10)0.50) yline(0, lw(thick)) xtitle("Simulation N°") title("Male leader", color(black)) graphregion(color(white))
graph save simulation3.gph, replace

graph combine "simulation1.gph" "simulation2.gph" "simulation3.gph", row(1) xsize(8) graphregion(color(white))
graph export outofsample.png, replace

* (3) Simulating in the world dataset
*************************************

* E114: Load data

use WPO_WVS_QOG.dta, replace
gen WVS=1
keep E114_clean wdi_pop_log wdi_popf wdi_pop14 wdi_pop65 vdem_libdem vdem_corr gdp_log wdi_tele lp_lat_abst wdi_popurb lp_catho80 lp_muslim80 lp_protmg80 year year_sq female education age urban income E114_inclusion WVS
drop if E114_inclusion!=1
append using WPO_world_data_10k.dta

* Setting multiple imputation format

mi set wide
mi register imputed E114_clean

mi tsset, clear
mi stset, clear

* Multiple imputation based on OLS regression

mi impute regress E114_clean wdi_pop_log wdi_popf wdi_pop14 wdi_pop65 vdem_libdem vdem_corr gdp_log wdi_tele lp_lat_abst wdi_popurb lp_catho80 lp_muslim80 lp_protmg80 year year_sq female education age urban income c.education#c.vdem_corr c.education#c.vdem_libdem c.income#c.vdem_corr c.income#c.vdem_libdem c.female#c.lp_catho80 c.female#c.lp_muslim80 c.female#c.lp_protmg80 c.age#c.year, add(10)

drop if WVS==1
save WPO_world_data_10k_bis.dta, replace

* E117: Load data

use WPO_WVS_QOG.dta, replace
gen WVS=1
keep E117_clean wdi_pop_log wdi_popf wdi_pop14 wdi_pop65 vdem_libdem vdem_corr gdp_log wdi_tele lp_lat_abst wdi_popurb lp_catho80 lp_muslim80 lp_protmg80 year year_sq female education age urban income E117_inclusion WVS
drop if E117_inclusion!=1
append using WPO_world_data_10k_bis.dta

* Setting multiple imputation format

mi set wide
mi register imputed E117_clean

mi tsset, clear
mi stset, clear

* Multiple imputation based on OLS regression

mi impute regress E117_clean wdi_pop_log wdi_popf wdi_pop14 wdi_pop65 vdem_libdem vdem_corr gdp_log wdi_tele lp_lat_abst wdi_popurb lp_catho80 lp_muslim80 lp_protmg80 year year_sq female education age urban income c.education#c.vdem_corr c.education#c.vdem_libdem c.income#c.vdem_corr c.income#c.vdem_libdem c.female#c.lp_catho80 c.female#c.lp_muslim80 c.female#c.lp_protmg80 c.age#c.year, add(10)

drop if WVS==1
save WPO_world_data_10k_bis.dta, replace

* D059: Load data

use WPO_WVS_QOG.dta, replace
gen WVS=1
keep D059_clean wdi_pop_log wdi_popf wdi_pop14 wdi_pop65 vdem_libdem vdem_corr gdp_log wdi_tele lp_lat_abst wdi_popurb lp_catho80 lp_muslim80 lp_protmg80 year year_sq female education age urban income D059_inclusion WVS
drop if D059_inclusion!=1
append using WPO_world_data_10k_bis.dta

* Setting multiple imputation format
mi set wide
mi register imputed D059_clean

mi tsset, clear
mi stset, clear

* Multiple imputation based on OLS regression

mi impute regress D059_clean wdi_pop_log wdi_popf wdi_pop14 wdi_pop65 vdem_libdem vdem_corr gdp_log wdi_tele lp_lat_abst wdi_popurb lp_catho80 lp_muslim80 lp_protmg80 year year_sq female education age urban income c.education#c.vdem_corr c.education#c.vdem_libdem c.income#c.vdem_corr c.income#c.vdem_libdem c.female#c.lp_catho80 c.female#c.lp_muslim80 c.female#c.lp_protmg80 c.age#c.year, add(10)

drop if WVS==1
save WPO_world_data_10k_bis.dta, replace

* Selecting one simulation among the 10

gen random = runiform(0, 1)

gen E114_simulation=_1_E114_clean if random<0.10
replace E114_simulation=_2_E114_clean if random>0.10 & random<0.20
replace E114_simulation=_3_E114_clean if random>0.20 & random<0.30
replace E114_simulation=_4_E114_clean if random>0.30 & random<0.40
replace E114_simulation=_5_E114_clean if random>0.40 & random<0.50
replace E114_simulation=_6_E114_clean if random>0.50 & random<0.60
replace E114_simulation=_7_E114_clean if random>0.60 & random<0.70
replace E114_simulation=_8_E114_clean if random>0.70 & random<0.80
replace E114_simulation=_9_E114_clean if random>0.80 & random<0.90
replace E114_simulation=_10_E114_clean if random>0.90


gen E117_simulation=_11_E117_clean if random<0.10
replace E117_simulation=_12_E117_clean if random>0.10 & random<0.20
replace E117_simulation=_13_E117_clean if random>0.20 & random<0.30
replace E117_simulation=_14_E117_clean if random>0.30 & random<0.40
replace E117_simulation=_15_E117_clean if random>0.40 & random<0.50
replace E117_simulation=_16_E117_clean if random>0.50 & random<0.60
replace E117_simulation=_17_E117_clean if random>0.60 & random<0.70
replace E117_simulation=_18_E117_clean if random>0.70 & random<0.80
replace E117_simulation=_19_E117_clean if random>0.80 & random<0.90
replace E117_simulation=_20_E117_clean if random>0.90

drop D059_simulation
gen D059_simulation=_21_D059_clean if random<0.10
replace D059_simulation=_22_D059_clean if random>0.10 & random<0.20
replace D059_simulation=_23_D059_clean if random>0.20 & random<0.30
replace D059_simulation=_24_D059_clean if random>0.30 & random<0.40
replace D059_simulation=_25_D059_clean if random>0.40 & random<0.50
replace D059_simulation=_26_D059_clean if random>0.50 & random<0.60
replace D059_simulation=_27_D059_clean if random>0.60 & random<0.70
replace D059_simulation=_28_D059_clean if random>0.70 & random<0.80
replace D059_simulation=_29_D059_clean if random>0.80 & random<0.90
replace D059_simulation=_30_D059_clean if random>0.90

drop _*
drop random

save WPO_world_data_10k_bis.dta, replace



